import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from IPython.display import HTML

input_file = 'owid-covid-data.csv'
df = pd.read_csv(input_file)
df['ds'] = pd.to_datetime(df['date'], format="%d/%m/%Y")
df = df.sort_values(by=['ds'])

Explore data

valuesToDrop = ['Asia', 'World', 'International', 'European Union', 'Europe', 'North America', 'Africa', 
                'South America', 'Oceania']
df1 = df[~(df['location'].isin(valuesToDrop))]
df1 = df1.dropna(subset=['new_cases'])
cases_dist = px.sunburst(df1, path=['continent', 'location'], 
                   values = 'new_cases',
                   color='new_cases', 
                   color_continuous_scale = px.colors.sequential.Magenta,
                   title = 'Covid cases distribution')

Plotting covid new cases choropleth map

df2 = df.copy()
df2['ds'] = df2['ds'].astype(str)

fig = px.choropleth(df2, 
                  locations='iso_code',
                  color='new_cases',
                  hover_name='location',
                  animation_frame="ds",
                  color_continuous_scale=px.colors.sequential.Reds)

Time Series Analysis

df4 = df.groupby(by=['ds'])['new_cases'].sum().reset_index().sort_values(by='ds', ascending=True)
df4 = df4.rename(columns={'new_cases':'y'})
from prophet import Prophet
from prophet.plot import plot_plotly, plot_components_plotly, add_changepoints_to_plot

# instantiate the model, fit the timeseries to the model
model = Prophet().fit(df4)

# predict future cases
future = model.make_future_dataframe(periods=365)
forecast = model.predict(future)
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Initial log joint probability = -9.54837
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      70       938.957    0.00157276       128.333   1.907e-05       0.001      138  LS failed, Hessian reset 
      99       939.815   0.000128232       50.4087      0.7411      0.7411      177   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     149       940.955    0.00105143       72.7678   9.419e-06       0.001      274  LS failed, Hessian reset 
     199       941.504   1.17708e-05       69.7156      0.3262      0.3262      347   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     249       941.506   5.23168e-08        77.369      0.2579           1      423   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance
fig = model.plot(forecast)
a = add_changepoints_to_plot(fig.gca(), model, forecast)

hyperparameters tuning

m = Prophet(weekly_seasonality=False, changepoint_prior_scale=0.4, changepoint_range=0.8).fit(df4)

future = m.make_future_dataframe(periods=365)
forecast = m.predict(future)

fig1 = m.plot(forecast, figsize=(20, 12))
ax = fig1.gca()
ax.set_title("Covid cases projection", size=24)
ax.set_xlabel('Date', size=20)
ax.set_ylabel('Cases', size=20)
ax.tick_params(axis="x", labelsize=18)
ax.tick_params(axis="y", labelsize=18)
ax.yaxis.get_major_formatter().set_scientific(False)
INFO:prophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
Initial log joint probability = -9.54837
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       991.673     0.0359472       30.5977           1           1      124   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     197       1006.66   0.000135233       8.33226   6.068e-06       0.001      282  LS failed, Hessian reset 
     199       1006.69    0.00582461       19.6152           1           1      285   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     299       1010.11    0.00610452       27.3128      0.1417      0.8632      402   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     397       1012.49   0.000425993       34.5145   2.078e-05       0.001      563  LS failed, Hessian reset 
     399       1012.59    0.00733377       11.8145           1           1      566   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     438       1013.93    0.00037775       24.5641   2.372e-05       0.001      654  LS failed, Hessian reset 
     499       1014.52      0.020022        17.107      0.4228           1      736   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     571       1015.17   0.000224349       15.6829   6.343e-06       0.001      860  LS failed, Hessian reset 
     599       1015.27    0.00190556       12.9943           1           1      896   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     654       1015.51   0.000144255       9.44615   5.975e-06       0.001     1000  LS failed, Hessian reset 
     699       1015.65    0.00689767       7.94546           1           1     1062   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     722       1015.66   8.60491e-05       7.39676   1.042e-05       0.001     1127  LS failed, Hessian reset 
     796       1015.69   4.43309e-05       6.53984   6.232e-06       0.001     1264  LS failed, Hessian reset 
     799       1015.69   7.05809e-06       4.81613      0.5934      0.5934     1268   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     841       1015.69   5.87928e-05       9.38292   6.446e-06       0.001     1372  LS failed, Hessian reset 
     862       1015.69   9.62217e-07       5.84872     0.08992      0.9129     1401   
Optimization terminated normally: 
  Convergence detected: relative gradient magnitude is below tolerance